#### PREAMBLE ####

#load our packages
library(foreign)
library(plyr)
library(tidyverse)
library(olsrr)
library(nnet)
library(reshape2)
library(broom)
library(DescTools)
library(rms)
library(lmtest)
library(stargazer)
library(ggplot2)
library(readr)
library(readxl)
library(plm)
library(foreach)
library(parallel)
library(readstata13)
library(ggeffects)
library(estimatr)
library(margins)
library(MASS)
library(DataCombine)
library(extrafont)
library(texreg)

loadfonts(device = "win")
par(family = "LM Roman 10")


#### ANALYSIS ####

df <- read.csv("Source data/drug_trafficking_homicides_correlations.csv", stringsAsFactors = F, na.strings = "")

df$rh_ln <- log(df$rivalry_hom + 0.00001)
df$hom_ln <- log(df$homicides + 0.00001)
df$gh_ln <- log(df$gun_homicides + 0.00001)
df$mh_ln <- log(df$malicious_homicides + 0.00001)

#Table 1
lm0 <- lm_robust(rh_ln ~ mh_ln, data=df, clusters=state)
summary(lm0)
texreg(lm0, include.ci=F, include.fstatistic=T, include.rmse=F, file="Outputs/T1_proxy_regs.txt")

#### PLOTS FOR CPS ####

scale_x_ln <- function(...) scale_x_continuous(..., trans = scales::log_trans())
scale_y_ln <- function(...) scale_y_continuous(..., trans = scales::log_trans())


#set axis font size
nums <- 12

#### CPS PLOT 1 ####

library(lmtest)
library(sandwich)
library(ggplot2)
library(dplyr)

model <- lm(hom_ln ~ rh_ln, data = df)
vcov_clustered <- vcovHC(model, type = "HC1", cluster = ~state, data = df)

# Calculate predictions and standard errors explicitly
predictions <- predict(model, newdata = df, se.fit = TRUE, vcov. = vcov_clustered)
df$predicted = predictions$fit
# Ensure se_clustered is a vector
se_clustered <- predictions$se.fit

# Calculate confidence intervals
alpha <- 0.05
z_value <- qnorm(1 - alpha / 2)

df$lower <- df$predicted - (z_value * se_clustered)
df$upper <- df$predicted + (z_value * se_clustered)

df$predicted[df$rivalry_hom==0] <- NA_real_
df$lower[df$rivalry_hom==0] <- NA_real_
df$upper[df$rivalry_hom==0] <- NA_real_


hom <- ggplot(df, aes(x = rivalry_hom, y = homicides)) +
  geom_ribbon(aes(ymin = exp(lower), ymax = exp(upper)), fill = "grey83", alpha = 0.5) + # Adjusted for log scale
  geom_line(aes(y = exp(predicted)), color = "grey70", size = .5) + # Adjusted for log scale
  geom_point(alpha = .5) +
  scale_x_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  scale_y_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  coord_cartesian(xlim = c(2, 5000), ylim = c(2, 5000)) +
  theme_classic(base_size=15) +
  theme(axis.text = element_text(size = nums), axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Cartel homicides by state\n2007-2010") +
  ylab("Homicides by state\n2007-2010")

hom


h <- 5
w <- 1*h
ggsave("Outputs/plots/hom_cor.pdf", width=w, height=h)


#### CPS PLOT 2 ####

model <- lm(gh_ln ~ rh_ln, data = df)
vcov_clustered <- vcovHC(model, type = "HC1", cluster = ~state, data = df)

# Calculate predictions and standard errors explicitly
predictions <- predict(model, newdata = df, se.fit = TRUE, vcov. = vcov_clustered)
df$predicted = predictions$fit
# Ensure se_clustered is a vector
se_clustered <- predictions$se.fit

# Calculate confidence intervals
alpha <- 0.05
z_value <- qnorm(1 - alpha / 2)

df$lower <- pmax(df$predicted - (z_value * se_clustered), log(2))
df$upper <- pmin(df$predicted + (z_value * se_clustered), log(5000))

gun <- ggplot(df, aes(x = rivalry_hom, y = gun_homicides)) +
  geom_ribbon(aes(ymin = exp(lower), ymax = exp(upper)), fill = "grey83", alpha = 0.5) + # Adjusted for log scale
  geom_line(aes(y = exp(predicted)), color = "grey70", size = .5) + # Adjusted for log scale
  geom_point(alpha = .5) +
  scale_x_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  scale_y_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  coord_cartesian(xlim = c(2, 5000), ylim = c(2, 5000)) +
  theme_classic(base_size=15) +
  theme(axis.text = element_text(size = nums), axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Cartel homicides by state\n2007-2010")+
  ylab("Gun homicides by state\n2007-2010")

gun

h <- 5
w <- 1*h
ggsave("Outputs/plots/gh_cor.pdf", width=w, height=h)

#### CPS PLOT 3 ####

model <- lm(mh_ln ~ rh_ln, data = df)
vcov_clustered <- vcovHC(model, type = "HC1", cluster = ~state, data = df)

# Calculate predictions and standard errors explicitly
predictions <- predict(model, newdata = df, se.fit = TRUE, vcov. = vcov_clustered)
df$predicted = predictions$fit
# Ensure se_clustered is a vector
se_clustered <- predictions$se.fit

# Calculate confidence intervals
alpha <- 0.05
z_value <- qnorm(1 - alpha / 2)

df$lower <- pmax(df$predicted - (z_value * se_clustered), log(2))
df$upper <- pmin(df$predicted + (z_value * se_clustered), log(5000))

mh <- ggplot(df, aes(x = rivalry_hom, y = malicious_homicides)) +
  geom_ribbon(aes(ymin = exp(lower), ymax = exp(upper)), fill = "grey83", alpha = 0.5) + # Adjusted for log scale
  geom_line(aes(y = exp(predicted)), color = "grey70", size = .5) + # Adjusted for log scale
  geom_point(alpha = .5) +
  scale_x_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5001), expand = c(.02, .02)) +
  scale_y_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5001), expand = c(.02, .02)) +
  coord_cartesian(xlim = c(2, 5000), ylim = c(2, 5001)) +
  theme_classic(base_size=15) +
  theme(axis.text = element_text(size = nums), axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Cartel homicides by state\n2007-2010")+
  ylab("Intentional homicides by state\n2007-2010")

mh

h <- 5
w <- 1*h
ggsave("Outputs/plots/mh_cor.pdf", width=w, height=h)


#### CPS PLOT 4 - MIS PER COR ####

match <- read.csv("Outputs/Analysis all muns.csv", stringsAsFactors = F)
match$INEGI <- sprintf("%05d", match$INEGI)
match$INEGI <- as.character(match$INEGI)
match$time <- as.Date(match$time)
match$state_num <- substr(match$INEGI, 1, 2)
match <- match[,c("State", "state_num")]
match <- match %>% 
  distinct(State, .keep_all=T)
class(df$state)
match$state_num <- as.numeric(match$state_num)
match <- match[!match$State %in% c("VERACRUZ DE IGNACIO DE LA LLAVE", "COAHUILA DE ZARAGOZA"),]
df <- left_join(df, match, by=c("state"="state_num"))

library(dplyr)
library(lubridate)


mis <- read.csv("Source data/mis.csv", stringsAsFactors = F)
class(mis$time)
mis$time[mis$time=="NO ESPECIFICADO"] <- NA_character_
mis$time <- as.Date(mis$time, format = "%m/%d/%Y")
min(mis$time, na.rm=T)
library(ggplot2)
ggplot(data=mis, aes(x=mis$time))+
  geom_density()

mis_summary <- mis %>%
  mutate(year = year(as.Date(time))) %>%
  group_by(State, year) %>%
  summarise(mis_per = n(), .groups = 'drop')

df <- df %>%
  left_join(mis_summary, by = c("State", "year"))
df$mis_per[is.na(df$mis_per)] <- 0

df$mis_per_ln <- log(df$mis_per + 0.00001)


#'df' has been filtered to exclude rows where 'rh' is 0
df <- df %>% filter(rivalry_hom != 0)

model <- lm(mis_per_ln ~ rh_ln, data = df)
vcov_clustered <- vcovHC(model, type = "HC1", cluster = ~state, data = df)

# Calculate predictions and standard errors explicitly
predictions <- predict(model, newdata = df, se.fit = TRUE, vcov. = vcov_clustered)
df$predicted = predictions$fit
# Ensure se_clustered is a vector
se_clustered <- predictions$se.fit

# Calculate confidence intervals
alpha <- 0.05
z_value <- qnorm(1 - alpha / 2)

df$lower <- pmax(df$predicted - (z_value * se_clustered), log(2))
df$upper <- pmin(df$predicted + (z_value * se_clustered), log(5000))

mis_per <- ggplot(df, aes(x = rivalry_hom, y = mis_per)) +
  geom_ribbon(aes(ymin = exp(lower), ymax = exp(upper)), fill = "grey83", alpha = 0.5) + # Adjusted for log scale
  geom_line(aes(y = exp(predicted)), color = "grey70", size = .5) + # Adjusted for log scale
  geom_point(alpha = .5) +
  scale_x_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  scale_y_log10(breaks = c(2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000), limits = c(2, 5000), expand = c(.02, .02)) +
  coord_cartesian(xlim = c(2, 5000), ylim = c(2, 5000)) +
  theme_classic(base_size=15) +
  theme(axis.text = element_text(size = nums), axis.text.x = element_text(angle = 45, hjust = 1)) +
  xlab("Cartel homicides by state\n2007-2010")+
  ylab("Missing persons by state\n2007-2010")

mis_per

h <- 5
w <- 1*h
ggsave("Outputs/plots/misper_cor.pdf", width=w, height=h)



#### COMBINE CPS PLOTS ####

library(patchwork)

combined_plot <- hom + gun + mh + mis_per +
  plot_layout(ncol = 2)
combined_plot

h <- 7
w <- 7.5
ggsave("Outputs/plots/Fig3_combined_cors.pdf", width=w, height=h)

#remove component plots
# List all files in the folder
remove_files <- c("Outputs/plots/gh_cor.pdf", "Outputs/plots/hom_cor.pdf", "Outputs/plots/mh_cor.pdf", "Outputs/plots/misper_cor.pdf")

# Delete all files in the folder
file.remove(remove_files)




#
